clear all
set more off
set maxvar 10000


*******************************************************
* Black-White Catch Up Between Early and Late Cohorts *
*******************************************************

*------------------*
* RANK-RANK
*------------------*

	use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
    
    keep if baseline_sample==1
    label var rank_son_baseline " "
    label var rank_father_baseline " "
    
    gen early = (decade>=1910 & decade<=1920)
    gen late = (decade>=1940 & decade<=1950)
    keep if early==1 | late==1
	
	gen yhat_fam= .
	gen yhat_father= .
	
	foreach name in early late { 
	forval r=1/2 { 
	
		quietly reg rank_son_baseline i.decade if `name'==1 & race==`r'  [aw=wgt_sex_race]
					predict yhat_fam`name'`r' if `name'==1 & race==`r' , residuals
					sum rank_son_baseline if `name'==1 & race==`r'  [aw=wgt_sex_race]
					replace yhat_fam = yhat_fam`name'`r' + `r(mean)' if `name'==1 & race==`r' 
			
		quietly reg rank_father_baseline i.decade if `name'==1 & race==`r' [aw=wgt_sex_race]
					predict yhat_father`name'`r' if `name'==1 & race==`r' , residuals
					sum rank_father_baseline if `name'==1 & race==`r' [aw=wgt_sex_race]
					replace yhat_father = yhat_father`name'`r' + `r(mean)' if `name'==1 & race==`r' 
	}
	}
	
	replace rank_son_baseline = yhat_fam 
	replace rank_father_baseline = yhat_father
	

    tempfile fulldata
    save `fulldata'
    
* Run binscatters by race and save the scatter points
    forval r=1/2 { 
        foreach name in early late {  
            binscatter rank_son_baseline rank_father_baseline if `name'==1 & race==`r' [aw=wgt_sex_race],  nq(20) savedata("binscatter_r`r'_`name'") replace
        } 
    } 
 

    forval r=1/2 { 
        foreach name in early late { 
            insheet using "binscatter_r`r'_`name'.csv", clear 
            gen race=`r' 
            gen period="`name'" 
            save "binscatter_r`r'_`name'.dta", replace
        } 
    } 

* Append binscatters
    use  "binscatter_r1_early", clear
    append using    "binscatter_r2_early" 
    append using    "binscatter_r1_late" 
    append using    "binscatter_r2_late" 

* Run the regressions on the full panel; save constant and slope 
    gen slope=.
    gen intercept=.
    
    forval r=1/2 { 
        foreach name in early late { 
            preserve
            use `fulldata', clear
            
            quietly reg rank_son_baseline rank_father_baseline if `name'==1 & race==`r' [aw=wgt_sex_race]
            restore 
            
            replace slope = _b[rank_father_baseline] if period=="`name'" & race==`r'
            replace intercept = _b[_cons] if period=="`name'" & race==`r'
        }
    }
    
* Generate predicted values for each point using slope and intercept
    gen yhat = intercept + (slope*rank_father_baseline)
    
    local son_measure "rank_son_baseline"
    local father_measure "rank_father_baseline"

* Figure
     #delimit ;
        twoway (scatter `son_measure' `father_measure' if race==1 & period=="early", m(oh) mc(midblue)) 
               (line yhat `father_measure' if race==1 & period=="early", lp(dash) lc(midblue))  
               (scatter `son_measure' `father_measure' if race==2 & period=="early", m(+) mc(orange_red*0.8)) 
               (line yhat `father_measure' if race==2 & period=="early", lp(dash) lc(orange_red*0.8))
               (scatter `son_measure' `father_measure' if race==1 & period=="late", m(dh) mc(navy)) 
               (line yhat `father_measure' if race==1 & period=="late", lp(dash) lc(navy))  
               (scatter `son_measure' `father_measure' if race==2 & period=="late", m(t) mc(cranberry)) 
               (line yhat `father_measure' if race==2 & period=="late", lp(dash) lc(cranberry)),
        legend(on order(1 3 5 7) label(1 "White, 1910-1929") label(3 "Black, 1910-1929") 
        label(5 "White, 1940-1959") label(7 "Black, 1940-1959") ring(0) pos(5)) ylabel(15(10)65, axis(1)) yscale(r(20 65))
        xti(" " "Predicted parental rank") yti("Respondent rank" " ")  ;
        graph export "$Mydirectory2/main_figures_tables/figure3_rank.pdf", replace ; 
    #delimit cr


* Get rid of binscatters
    forval r=1/2 { 
        foreach name in early late { 
            cap rm "binscatter_r`r'_`name'.csv" 
            cap rm "binscatter_r`r'_`name'.do" 
            cap rm "binscatter_r`r'_`name'.dta" 
            cap rm "binscatter_r`r'_`name'.csv.do" 
        } 
    } 

    foreach name in early late { 
        cap rm "binscatter_all_`name'.csv" 
        cap rm "binscatter_all_`name'.do" 
        cap rm "binscatter_all_`name'.dta" 
        cap rm "binscatter_all_`name'.csv.do" 
    } 


*-----------------------------------------------------------------------------*
*-----------------------------------------------------------------------------*

*------------------*
* IGE VERSION 
*------------------*

	use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
    keep if baseline_sample==1
    
    gen early = (decade>=1910 & decade<=1920)
    gen late = (decade>=1940 & decade<=1950)
    keep if early==1 | late==1
	
	gen yhat_fam= .
	gen yhat_father= .
	
	foreach name in early late { 
	forval r=1/2 { 
	
		quietly reg log_son_baseline i.decade if `name'==1 & race==`r'  [aw=wgt_sex_race]
					predict yhat_fam`name'`r' if `name'==1 & race==`r' , residuals
					sum log_son_baseline if `name'==1 & race==`r'  [aw=wgt_sex_race]
					replace yhat_fam = yhat_fam`name'`r' + `r(mean)' if `name'==1 & race==`r' 
			
		quietly reg log_father_baseline i.decade if `name'==1 & race==`r' [aw=wgt_sex_race]
					predict yhat_father`name'`r' if `name'==1 & race==`r' , residuals
					sum log_father_baseline if `name'==1 & race==`r' [aw=wgt_sex_race]
					replace yhat_father = yhat_father`name'`r' + `r(mean)' if `name'==1 & race==`r' 
	}
	}
	
	replace log_son_baseline = yhat_fam 
	replace log_father_baseline = yhat_father	
            
    tempfile fulldata
    save `fulldata'
             
    
* First run binscatters by race; save the scatter points
    foreach name in early late { 
        forval r=1/2 {       
            binscatter log_son_baseline log_father_baseline if `name'==1 & race==`r' [aw=wgt_sex_race], nq(20) savedata("binscatter_r`r'_`name'") replace 
        } 
    }


    forval r=1/2 { 
        foreach name in early late { 
            insheet using "binscatter_r`r'_`name'.csv", clear 
            gen race=`r' 
            gen period="`name'" 
            save "binscatter_r`r'_`name'.dta", replace
        } 
    } 

* Append 
    use  "binscatter_r1_early", clear
    append using    "binscatter_r2_early" 
    append using    "binscatter_r1_late" 
    append using    "binscatter_r2_late" 

* Now run the regressions on the full panel and save constant and slope 
    gen slope=.
    gen intercept=.
    
* Get slope and intercept
    foreach period in early late {
    foreach race in 1 2 { 
            preserve
            use `fulldata', clear
            
            keep if race==`race' & `period'==1
            
                reg log_son_baseline log_father_baseline  [aw=wgt_sex_race] 
                    
                restore
                
                replace slope = _b[log_father_baseline]  if race==`race' & period=="`period'" 
                replace intercept = _b[_cons] if race==`race' & period=="`period'"
            }
        }

    
* Generate predicted values for each point using slope and intercept
    gen yhat = intercept + (slope*log_father_baseline) 
    
    local son_measure "log_son_baseline"
    local father_measure "log_father_baseline"

* Figure
 #delimit ;
    twoway (scatter `son_measure' `father_measure' if race==1 & period=="early", m(oh) mc(midblue)) 
           (line yhat `father_measure' if race==1 & period=="early", lp(dash) lc(midblue))  
           (scatter `son_measure' `father_measure' if race==2 & period=="early", m(+) mc(orange_red*0.8)) 
           (line yhat `father_measure' if race==2 & period=="early", lp(dash) lc(orange_red*0.8))
           (scatter `son_measure' `father_measure' if race==1 & period=="late", m(dh) mc(navy)) 
           (line yhat `father_measure' if race==1 & period=="late", lp(dash) lc(navy))  
           (scatter `son_measure' `father_measure' if race==2 & period=="late", m(t) mc(cranberry)) 
           (line yhat `father_measure' if race==2 & period=="late", lp(dash) lc(cranberry)),
    legend(on order(1 3 5 7) label(1 "White, 1910-1929") label(3 "Black, 1910-1929") 
    label(5 "White, 1940-1959") label(7 "Black, 1940-1959") ring(0) pos(5)) ylabel(7.5(0.5)9, axis(1)) yscale(r(7.5 9.25)) 
    xti(" " "Predicted parental logged income") yti("Respondent logged income" " ")  xlabel(6(0.5)9.5, axis(1)) xscale(r(6 9.5)) ;
        graph export "$Mydirectory2/main_figures_tables/figure3_ige.pdf", replace ; 
#delimit cr


* Get rid of temporary files
    forval r=1/2 { 
        foreach name in early late { 
            cap rm "binscatter_r`r'_`name'.csv" 
            cap rm "binscatter_r`r'_`name'.do" 
            cap rm "binscatter_r`r'_`name'.dta" 
            cap rm "binscatter_r`r'_`name'.csv.do" 
        } 
    } 